library(ManyEcoEvo)
## Warning: replacing previous import 'purrr::%@%' by 'rlang::%@%' when loading
## 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_lgl' by 'rlang::flatten_lgl'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::splice' by 'rlang::splice' when
## loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_chr' by 'rlang::flatten_chr'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_raw' by 'rlang::flatten_raw'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten' by 'rlang::flatten' when
## loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_dbl' by 'rlang::flatten_dbl'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::invoke' by 'rlang::invoke' when
## loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_int' by 'rlang::flatten_int'
## when loading 'ManyEcoEvo'
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(rlang)
##
## Attaching package: 'rlang'
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
library(performance)
library(see)
library(modelbased)
Create orchard-style plots for both BT and Euc, without removing analyses with extreme weights:
inv_bc_var_weights <- ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
collinearity_subset == "All"
) %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())))
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
inv_bc_var_weights %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_orchard_plot")) %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name),
.f = ~ plot_model_means_orchard(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3))
## [[1]]
##
## [[2]]
From the above it’s clear that the general pattern is for the higher weighted cases to pull the model means closer to them. In the case of the eucalyptus data, it’s several EXTREME weights that are pulling the model means closer to them. Let’s look at the distributions of weights and their values for the euc dataset Weight Distribution Blue Tit Model:
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "blue tit") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>% pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
ggplot(aes(x = weights)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Box-Cox Weights",
x = "Box-Cox Weights",
y = "Frequency") +
theme_minimal() +
theme(legend.position = "none")
Weight Distribution Eucalyptus Model
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>%
pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
ggplot(aes(x = weights)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Box-Cox Weights",
x = "Box-Cox Weights",
y = "Frequency") +
theme_minimal() +
theme(legend.position = "none")
Weight Distribution for Eucalyptus model when case with maximum weight removed
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>%
pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
filter(weights != max(weights)) %>%
ggplot(aes(x = weights)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Box-Cox Weights",
x = "Box-Cox Weights",
y = "Frequency") +
theme_minimal() +
theme(legend.position = "none")
Let’s see all values of weight
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>%
pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
pluck("weights", unique, sort) # all weights
## [1] 563686395 563717442 563749404 563792386 565399621
## [6] 565829955 565935996 566268848 567997367 569333108
## [11] 574528639 575196234 575387867 575722251 577600776
## [16] 579285551 579948308 580899020 583278546 584152556
## [21] 585498693 588256091 589433325 591917249 592900308
## [26] 607544276 617833974 622796581 626556210 633218858
## [31] 634782083 639555963 645135519 675024708 679607659
## [36] 679917410 685839966 694551570 731532604 810363823
## [41] 841303906 848538448 872489778 883906458 889910295
## [46] 890972369 919288483 933683584 962902173 1041096068
## [51] 1105126672 1155109828 1175856985 1373998220 1553121885
## [56] 1652915383 1739127212 1859021802 1921555550 1932590106
## [61] 2160580342 2185971274 2284404564 3627724910 4623841569
## [66] 5082015565 5099533477 5127083966 5313360448 5637576773
## [71] 5749335304 5767498603 5986636683 6027266130 7006392797
## [76] 9979626599 11545920175 708161001050
What are the cases with extreme weights? Below we identify the cases with the top 2 maximum weights
extreme_weight_observation <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
pluck("effects_analysis", 1) %>%
select(study_id, box_cox_abs_deviation_score_estimate, box_cox_var) %>%
mutate(weights = 1/box_cox_var) %>%
# filter(weights == max(weights))
filter(weights > 10000000000)
extreme_weight_observation
## # A tibble: 2 × 4
## study_id box_cox_abs_deviation_score_estimate box_cox_var weights
## <chr> <dbl> <dbl> <dbl>
## 1 Brooklyn-2-2-1 1.48 1.41e-12 708161001050.
## 2 Cassilis-1-1-1 -1.11 8.66e-11 11545920175.
Remove Eucalyptus cases with extreme weights and refit / plot models
refitted_eucalyptus_model <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
mutate(effects_analysis = map(effects_analysis, ~filter(.x, study_id %nin% extreme_weight_observation$study_id))) %>%
mutate(box_cox_rating_cat = map(effects_analysis, ~fit_boxcox_ratings_cat(
.data = .x,
outcome = box_cox_abs_deviation_score_estimate,
outcome_var = box_cox_var,
interceptless = FALSE
)))
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## boundary (singular) fit: see help('isSingular')
Note, not shown here yet. But when only the top most case is removed, lme4 gives singular fit warning, when I run check_singularity() on the model, it seems OK. When the top two cases are removed, singular fit warning is given by lme4, and check_singularity() returns FALSE, meaning the model is singular. Also, when I check convergence on the model with top 2 cases removed, gradient shifts from ~3.79e^-5 with only case with max weight removed to 0. Seems strange to me that this would occur when both extreme weights are removed… Anyways, I progress with refitting and plotting after removing cases with weights 10000000000 and above (here, there are two). Recreate the plot data for regenerating plots after refitting models
refitted_plot_data<-
refitted_eucalyptus_model %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords()))) %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_violin_plot"))
## We selected `at = c("PublishableAsIs")`.
Create violin plots for refitted models
refitted_plot_data %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = FALSE))
## [[1]]
Regenerate plot but back-transform to original scale (absolute deviation from meta-analytic mean)
refitted_plot_data %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = TRUE))
## [[1]]
Create orchard-style plots for refitted models:
refitted_plot_data %>%
mutate(plot_name = str_replace(plot_name, "_violin_plot", "_orchard_plot")) %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name),
.f = ~ plot_model_means_orchard(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3))
## [[1]]
What is causing these extreme values observed in
extreme_weight_observation? Weights seem correlated with
the box-cox transformed deviation score too
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
select(effects_analysis) %>%
unnest(effects_analysis) %>%
filter(study_id %in% extreme_weight_observation$study_id) %>%
select(study_id,
Zr,
VZr,
abs_deviation_score_estimate,
box_cox_abs_deviation_score_estimate,
folded_mu_val,
folded_v_val,
box_cox_var,
lambda) %>%
mutate(weights = 1/box_cox_var) %>% knitr::kable()
| study_id | Zr | VZr | abs_deviation_score_estimate | box_cox_abs_deviation_score_estimate | folded_mu_val | folded_v_val | box_cox_var | lambda | weights |
|---|---|---|---|---|---|---|---|---|---|
| Brooklyn-2-2-1 | -4.4681193 | 0.0086957 | 4.3758768 | 1.476168 | 4.3758768 | 0.0086957 | 0 | 5.58e-05 | 708161001050 |
| Cassilis-1-1-1 | 0.2361069 | 0.0030038 | 0.3283493 | -1.113643 | 0.3283493 | 0.0030038 | 0 | 5.58e-05 | 11545920175 |
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All") %>%
select(effects_analysis) %>%
unnest(effects_analysis) %>%
unnest(review_data) %>%
mutate(weights = 1/box_cox_var) %>%
write_csv("extreme_weight_analysis_info.csv")
The weights are calculated as the inverse of the
variance of the box-cox transformed absolute deviation scores. The
variance of the box-cox transformed absolute deviation scores
box_cox_var is calculated as the variance of the folded
absolute deviation scores and the folded variance of the absolute
deviation scores The folded absolute deviation scores
folded_mu and the The folded variance of the absolute
deviation scores folded_v are calculated as follows:
box_cox_transform
## function(data, dataset) {
## if(rlang::is_na(data) | rlang::is_null(data)){
## cli::cli_alert_warning(text = glue::glue("Cannot box-cox transform data for",
## paste(names(dplyr::cur_group()),
## dplyr::cur_group(),
## sep = " = ",
## collapse = ", ")))
## result <- NA
## } else{
## cli::cli_h2(glue::glue("Box-cox transforming absolute deviation scores for ", {dataset}))
##
## box_cox_recipe <- recipes::recipe(~.,
## data = select(data,
## starts_with("abs_deviation_score_"))) %>%
## timetk::step_box_cox(everything(),limits = c(0,10)) %>%
## recipes::prep(training = data, retain = TRUE) #estimate lambda + box cox transform vars
##
## if(box_cox_recipe %>%
## recipes::tidy(number = 1) %>% nrow() > 0){
## lambda <- box_cox_recipe %>%
## recipes::tidy(number = 1) %>%
## pull(., lambda) %>%
## `names<-`(., pull(box_cox_recipe %>%
## recipes::tidy(number = 1), terms))
##
## if(!is.null(dataset)){
## cli::cli_alert_info(c(
## "Optimised Lambda used in Box-Cox Transformation of ",
## "{dataset} dataset variables ",
## "is {round(lambda, 4)} for `{names(lambda)}`."
## ))
## }
##
## variance_box_cox <- function(folded_mu, folded_v, lambda){
## variance_bc <- folded_v*(lambda*folded_mu^(lambda-1))^2 # delta method
## return(variance_bc)
## }
##
## # folded abs_dev_score
## folded_mu <- function(abs_dev_score, VZr) {
## mu <- abs_dev_score
## sigma <- sqrt(VZr)
## fold_mu <- sigma * sqrt(2/pi) * exp((-mu^2)/(2 * sigma^2)) + mu * (1 - 2 * pnorm(-mu/sigma))
## fold_mu
## }
##
## # folded VZr
## folded_v <- function(abs_dev_score, VZr) {
## mu <- abs_dev_score
## sigma <- sqrt(VZr)
## fold_mu <- sigma * sqrt(2/pi) * exp((-mu^2)/(2 * sigma^2)) + mu * (1 - 2 * pnorm(-mu/sigma))
## fold_se <- sqrt(mu^2 + sigma^2 - fold_mu^2)
## # adding se to make bigger abs_dev_score
## fold_v <- fold_se^2
## fold_v
## }
##
## Z_colname <- data %>% colnames %>% keep(., str_starts(., "Z"))
## VZ_colname <- data %>% colnames %>% keep(., str_starts(., "VZ"))
## result <- recipes::juice(box_cox_recipe) %>%
## rename_with(.fn = ~ paste0("box_cox_", .x)) %>%
## bind_cols(data, .) %>%
## mutate(folded_mu_val = folded_mu(abs_deviation_score_estimate, !!as.name(VZ_colname)),
## folded_v_val = folded_v(abs_deviation_score_estimate, !!as.name(VZ_colname)),
## box_cox_var = variance_box_cox(folded_mu_val, folded_v_val, lambda[[1]]),
## lambda = lambda[[1]])
##
## } else{
## result <- NA
## cli::cli_alert_warning(text = glue::glue("Lambda cannot be computed."))
## }
##
##
## }
##
## return(result)
##
## }
## <bytecode: 0x1247db970>
## <environment: namespace:ManyEcoEvo>
So it seems we need to reconsider different weight options, but this might mean we need to reconsider returning back to the planned random effects structure, too
filter_args = rlang::exprs(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
collinearity_subset == "All")
prepare_ratings_data <- function(effects_analysis){
data_tbl <-
effects_analysis %>%
unnest(cols = c(review_data)) %>%
select(study_id,
TeamIdentifier,
starts_with("box_cox_"),
ReviewerId,
PublishableAsIs,
# lambda,
folded_v_val) %>%
ungroup() %>%
mutate(PublishableAsIs = forcats::fct_relevel(PublishableAsIs,c("deeply flawed and unpublishable",
"publishable with major revision",
"publishable with minor revision",
"publishable as is")),
obs_id = 1:n())
return(data_tbl)
}
# create formulas
base_formula <- rlang::new_formula(expr(box_cox_abs_deviation_score_estimate),
expr(PublishableAsIs))
calc_inv_bc_var <- as_function(~ 1/pull(.x, box_cox_var))
calc_inv_folded_v <- as_function(~ 1/pull(.x, folded_v_val))
no_weights <- NA
weight_formulas <- list(no_weights,
calc_inv_bc_var,
calc_inv_folded_v
) %>%
set_names("no_weights",
"inv_bc_var",
"inv_folded_v")
RE_rev <- expr((1 | ReviewerId))
RE_study <- expr((1 | study_id))
RE_both <- expr(!!RE_rev + !!RE_study)
random_expressions <- list(
RE_rev,
RE_study,
RE_both
) %>% set_names("RE_rev",
"RE_study",
"RE_both")
lmer_wrap <- function(data_tbl, random_effect, weight_form, ..., env = caller_env()){
f <- rlang::new_formula(expr(box_cox_abs_deviation_score_estimate),
expr(PublishableAsIs + !!random_effect), env = env)
weights <- if(is_na(weight_form)) NULL else weight_form(data_tbl)
inject(lme4::lmer(!!f,
data = data_tbl,
weights = weights, ...))
}
all_models <-
ManyEcoEvo_results %>%
ungroup %>%
filter(!!!filter_args) %>%
select(dataset, effects_analysis) %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
mutate(model_data = map(effects_analysis, prepare_ratings_data),.keep = "unused") %>%
expand_grid(expand_grid(weight_formulas, random_expressions) %>%
mutate(weight_forms = names(weight_formulas),
random_effect = names(random_expressions)) %>%
unite("model_spec", weight_forms, random_effect, sep = "_") ) %>%
mutate(model = pmap(list(model_data, random_expressions, weight_formulas), lmer_wrap),.keep = "unused") %>%
mutate(singularity = map_lgl(model, performance::check_singularity),
convergence = map_lgl(model, performance::check_convergence))
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Warning: There were 16 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `model = pmap(list(model_data, random_expressions,
## weight_formulas), lmer_wrap)`.
## Caused by warning in `checkConv()`:
## ! Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 15 remaining warnings.
estimate_means <-
all_models %>%
filter(singularity == F, convergence == T) %>%
reframe(model = set_names(model, model_spec),
results = map(model, possibly(modelbased::estimate_means, otherwise = NULL), at = "PublishableAsIs"),
results = set_names(results, dataset), dataset = dataset, model_spec = model_spec) %>%
rowwise() %>%
drop_na(results) # model means couldn't be estimated due to convergence issues, drop those models
## Warning: There were 20 warnings in `reframe()`.
## The first warning was:
## ℹ In argument: `results = map(...)`.
## Caused by warning:
## ! Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 19 remaining warnings.
# evaluate and compare performance for remaining models
model_comparison_results <-
all_models %>% semi_join(estimate_means, by = join_by(dataset, model_spec)) %>%
group_by(dataset) %>%
summarise(model = set_names(model, model_spec) %>% list,
results = map(model, performance::compare_performance, rank = T),
results = set_names(results, unique(dataset)), .groups = "keep")
## Some of the nested models seem to be identical and probably only vary in
## their random effects.
## Some of the nested models seem to be identical and probably only vary in
## their random effects.
model_comparison_results %>% pull(results) %>% map(knitr::kable)
$blue tit
| Name | Model | R2_conditional | R2_marginal | ICC | RMSE | Sigma | AIC_wt | AICc_wt | BIC_wt | Performance_Score |
|---|---|---|---|---|---|---|---|---|---|---|
| no_weights_RE_rev | lmerMod | 0.0906698 | 0.0074686 | 0.0838273 | 0.4827981 | 0.4979737 | 1 | 1 | 1 | 1.0000000 |
| inv_folded_v_RE_rev | lmerMod | 0.0004711 | 0.0000514 | 0.0004198 | 0.5026704 | 13.1865065 | 0 | 0 | 0 | 0.1000229 |
| inv_bc_var_RE_rev | lmerMod | 0.0007673 | 0.0001311 | 0.0006363 | 0.5822505 | 11.8297761 | 0 | 0 | 0 | 0.0154436 |
$eucalyptus
| Name | Model | R2_conditional | R2_marginal | ICC | RMSE | Sigma | AIC_wt | AICc_wt | BIC_wt | Performance_Score |
|---|---|---|---|---|---|---|---|---|---|---|
| inv_bc_var_RE_study | lmerMod | 0.9997595 | 0.0000000 | 0.9997595 | 0.000000 | 8.240200e-03 | 1 | 1 | 1 | 0.8750000 |
| no_weights_RE_rev | lmerMod | 0.1318616 | 0.0124202 | 0.1209435 | 1.017341 | 1.060566e+00 | 0 | 0 | 0 | 0.3217924 |
| inv_folded_v_RE_rev | lmerMod | 0.0005661 | 0.0000182 | 0.0005479 | 1.126659 | 1.968092e+01 | 0 | 0 | 0 | 0.1563435 |
| inv_bc_var_RE_rev | lmerMod | 0.0000000 | 0.0000000 | 0.0000000 | 1.499387 | 4.681748e+04 | 0 | 0 | 0 | 0.0000000 |
model_comparison_plots <-
model_comparison_results %>%
pull(results, "dataset") %>% map(plot)
model_comparison_plots[[1]] + ggtitle(names(model_comparison_plots)[[1]])
model_comparison_plots[[2]] + ggtitle(names(model_comparison_plots)[[2]])
note that the Performance scores are relative to best performing model for each dataset
model_means_results <-
estimate_means %>%
left_join(model_comparison_results %>% select(-model) %>% unnest(results), by = join_by("dataset", "model_spec" == "Name")) %>%
mutate(label = paste(dataset, model_spec, sep = ".")) %>%
arrange(dataset, desc(Performance_Score)) %>%
select(Performance_Score, dataset, model_spec, results) %>%
mutate(label = paste(dataset, model_spec, sep = "."))
model_means_results %>%
filter(dataset == "blue tit") %>%
pull(results, name = model_spec) %>%
map(., plot, at = "PublishableAsIs") %>%
map2(., names(.), ~ .x + labs(subtitle = as_label(.y), title = NULL) +
see::theme_lucid() + theme(axis.text.x = element_text(angle = 50, hjust = 1)))
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## $no_weights_RE_rev
##
## $inv_folded_v_RE_rev
##
## $inv_bc_var_RE_rev
model_means_results %>%
pull(results, name = label) %>%
map(knitr::kable)
$blue tit.no_weights_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -1.108483 | 0.1126669 | -1.330045 | -0.8869225 |
| publishable with major revision | -1.299960 | 0.0477652 | -1.394240 | -1.2056797 |
| publishable with minor revision | -1.300669 | 0.0396926 | -1.379313 | -1.2220256 |
| publishable as is | -1.240809 | 0.0695605 | -1.377695 | -1.1039236 |
$blue tit.inv_folded_v_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -1.395268 | 0.1416273 | -1.672852 | -1.117683 |
| publishable with major revision | -1.480845 | 0.0649463 | -1.608137 | -1.353552 |
| publishable with minor revision | -1.428534 | 0.0532131 | -1.532830 | -1.324239 |
| publishable as is | -1.180024 | 0.0846843 | -1.346003 | -1.014046 |
$blue tit.inv_bc_var_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -0.9522328 | 0.1183777 | -1.1842488 | -0.7202168 |
| publishable with major revision | -1.1493970 | 0.0587054 | -1.2644574 | -1.0343366 |
| publishable with minor revision | -1.0493725 | 0.0500971 | -1.1475611 | -0.9511839 |
| publishable as is | -0.7160750 | 0.0671234 | -0.8476345 | -0.5845154 |
$eucalyptus.inv_bc_var_RE_study
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -2.611111 | 0.0620347 | -2.734914 | -2.487308 |
| publishable with major revision | -2.611111 | 0.0620347 | -2.734914 | -2.487308 |
| publishable with minor revision | -2.611111 | 0.0620347 | -2.734914 | -2.487308 |
| publishable as is | -2.611111 | 0.0620347 | -2.734914 | -2.487308 |
$eucalyptus.no_weights_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -2.657022 | 0.2702666 | -3.190059 | -2.123985 |
| publishable with major revision | -2.366884 | 0.1248189 | -2.613713 | -2.120055 |
| publishable with minor revision | -2.646360 | 0.1093437 | -2.863306 | -2.429414 |
| publishable as is | -2.604804 | 0.1691851 | -2.938768 | -2.270839 |
$eucalyptus.inv_folded_v_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -3.312195 | 0.2585561 | -3.818956 | -2.805434 |
| publishable with major revision | -2.968746 | 0.1253142 | -3.214357 | -2.723135 |
| publishable with minor revision | -3.016617 | 0.1085725 | -3.229415 | -2.803819 |
| publishable as is | -3.084911 | 0.1624810 | -3.403368 | -2.766454 |
$eucalyptus.inv_bc_var_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -0.5121906 | 0.3861934 | -1.269116 | 0.2447346 |
| publishable with major revision | -1.0030146 | 0.2980537 | -1.587189 | -0.4188402 |
| publishable with minor revision | -2.2246690 | 0.2987831 | -2.810273 | -1.6390648 |
| publishable as is | -2.7559003 | 0.3383141 | -3.418984 | -2.0928169 |
model_means_results %>%
filter(dataset == "eucalyptus") %>%
pull(results, name = model_spec) %>%
map(., plot, at = "PublishableAsIs") %>%
map2(., names(.), ~ .x + labs(subtitle = as_label(.y), title = NULL) +
see::theme_lucid() + theme(axis.text.x = element_text(angle = 50, hjust = 1)))
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## $inv_bc_var_RE_study
##
## $no_weights_RE_rev
##
## $inv_folded_v_RE_rev
##
## $inv_bc_var_RE_rev
filter_args <- discard_at(filter_args, length(filter_args)) %>%
list(expr(dataset == "blue tit")) %>% list_flatten()
bt_co_v_all <-
ManyEcoEvo_results %>%
ungroup %>%
filter(!!!filter_args) %>%
select(dataset, collinearity_subset, effects_analysis) %>%
mutate(model_data = map(effects_analysis, prepare_ratings_data),.keep = "unused") %>%
expand_grid(expand_grid(weight_formulas, random_expressions) %>%
mutate(weight_forms = names(weight_formulas),
random_effect = names(random_expressions)) %>%
unite("model_spec", weight_forms, random_effect, sep = "_") ) %>%
mutate(model = pmap(list(model_data, random_expressions, weight_formulas), lmer_wrap),.keep = "unused") %>%
mutate(singularity = map_lgl(model, performance::check_singularity),
convergence = map_lgl(model, performance::check_convergence))
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Warning: There were 17 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `model = pmap(list(model_data, random_expressions,
## weight_formulas), lmer_wrap)`.
## Caused by warning in `checkConv()`:
## ! Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 16 remaining warnings.
bt_co_v_reduced <-
bt_co_v_all %>%
filter(singularity == F, convergence == T) %>%
group_by(collinearity_subset, model_spec) %>%
reframe(model = set_names(model, model_spec),
results = map(model, possibly(modelbased::estimate_means, otherwise = NULL), at = "PublishableAsIs"),
results = set_names(results, collinearity_subset), collinearity_subset, model_spec) %>%
rowwise() %>%
drop_na(results)
## Warning: There were 19 warnings in `reframe()`.
## The first warning was:
## ℹ In argument: `results = map(...)`.
## ℹ In group 1: `collinearity_subset = "All"` and `model_spec =
## "inv_bc_var_RE_rev"`.
## Caused by warning:
## ! Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 18 remaining warnings.
bt_perf <-
bt_co_v_all %>% semi_join(bt_co_v_reduced, by = join_by(collinearity_subset, model_spec)) %>%
group_by(collinearity_subset) %>%
summarise(model = set_names(model, model_spec) %>% list,
.groups = "keep") %>%
mutate( results = map(model, performance::compare_performance, rank = T),
results = set_names(results, unique(collinearity_subset)))
## Some of the nested models seem to be identical and probably only vary in
## their random effects.
## Some of the nested models seem to be identical and probably only vary in
## their random effects.
bt_perf %>%
pull(results, "collinearity_subset") %>%
map(plot)
## $All
##
## $collinearity_removed
bt_perf %>% pull(results) %>% map(knitr::kable)
$All
| Name | Model | R2_conditional | R2_marginal | ICC | RMSE | Sigma | AIC_wt | AICc_wt | BIC_wt | Performance_Score |
|---|---|---|---|---|---|---|---|---|---|---|
| no_weights_RE_rev | lmerMod | 0.0906698 | 0.0074686 | 0.0838273 | 0.4827981 | 0.4979737 | 1 | 1 | 1 | 1.0000000 |
| inv_folded_v_RE_rev | lmerMod | 0.0004711 | 0.0000514 | 0.0004198 | 0.5026704 | 13.1865065 | 0 | 0 | 0 | 0.1000229 |
| inv_bc_var_RE_rev | lmerMod | 0.0007673 | 0.0001311 | 0.0006363 | 0.5822505 | 11.8297761 | 0 | 0 | 0 | 0.0154436 |
$collinearity_removed
| Name | Model | R2_conditional | R2_marginal | ICC | RMSE | Sigma | AIC_wt | AICc_wt | BIC_wt | Performance_Score |
|---|---|---|---|---|---|---|---|---|---|---|
| no_weights_RE_rev | lmerMod | 0.1366284 | 0.0114632 | 0.1266166 | 0.2828712 | 0.2947137 | 0.0587685 | 0.0587685 | 0.0587685 | 0.6484142 |
| inv_bc_var_RE_rev | lmerMod | 0.0004195 | 0.0000167 | 0.0004028 | 0.3020777 | 5.0206226 | 0.9412315 | 0.9412315 | 0.9412315 | 0.4124799 |
| inv_folded_v_RE_rev | lmerMod | 0.0005121 | 0.0000124 | 0.0004997 | 0.3022892 | 6.9375939 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0001809 |
bt_mod_means_res <-
bt_co_v_reduced %>%
left_join(bt_perf %>% select(-model) %>% unnest(results), by = join_by("collinearity_subset", "model_spec" == "Name")) %>%
mutate(label = paste(collinearity_subset, model_spec, sep = ".")) %>%
arrange(collinearity_subset, desc(Performance_Score)) %>%
select(Performance_Score, collinearity_subset, model_spec, results) %>%
mutate(label = paste(collinearity_subset, model_spec, sep = "."))
bt_mod_means_res %>%
pull(results, name = label) %>%
map(knitr::kable)
$All.no_weights_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -1.108483 | 0.1126669 | -1.330045 | -0.8869225 |
| publishable with major revision | -1.299960 | 0.0477652 | -1.394240 | -1.2056797 |
| publishable with minor revision | -1.300669 | 0.0396926 | -1.379313 | -1.2220256 |
| publishable as is | -1.240809 | 0.0695605 | -1.377695 | -1.1039236 |
$All.inv_folded_v_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -1.395268 | 0.1416273 | -1.672852 | -1.117683 |
| publishable with major revision | -1.480845 | 0.0649463 | -1.608137 | -1.353552 |
| publishable with minor revision | -1.428534 | 0.0532131 | -1.532830 | -1.324239 |
| publishable as is | -1.180024 | 0.0846843 | -1.346003 | -1.014046 |
$All.inv_bc_var_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -0.9522328 | 0.1183777 | -1.1842488 | -0.7202168 |
| publishable with major revision | -1.1493970 | 0.0587054 | -1.2644574 | -1.0343366 |
| publishable with minor revision | -1.0493725 | 0.0500971 | -1.1475611 | -0.9511839 |
| publishable as is | -0.7160750 | 0.0671234 | -0.8476345 | -0.5845154 |
$collinearity_removed.no_weights_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -0.9413372 | 0.0715214 | -1.081986 | -0.8006889 |
| publishable with major revision | -1.0699941 | 0.0316932 | -1.132567 | -1.0074213 |
| publishable with minor revision | -1.0952845 | 0.0263067 | -1.147430 | -1.0431388 |
| publishable as is | -1.0858540 | 0.0457896 | -1.175984 | -0.9957241 |
$collinearity_removed.inv_bc_var_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -0.9801828 | 0.0633910 | -1.104427 | -0.8559387 |
| publishable with major revision | -1.0229691 | 0.0294880 | -1.080764 | -0.9651738 |
| publishable with minor revision | -1.0134392 | 0.0228057 | -1.058138 | -0.9687408 |
| publishable as is | -0.9607954 | 0.0394182 | -1.038054 | -0.8835371 |
$collinearity_removed.inv_folded_v_RE_rev
| PublishableAsIs | Mean | SE | CI_low | CI_high |
|---|---|---|---|---|
| deeply flawed and unpublishable | -1.178058 | 0.0776523 | -1.330254 | -1.025863 |
| publishable with major revision | -1.207697 | 0.0371832 | -1.280575 | -1.134820 |
| publishable with minor revision | -1.192916 | 0.0300419 | -1.251797 | -1.134035 |
| publishable as is | -1.129077 | 0.0506755 | -1.228399 | -1.029755 |
bt_mod_means_res %>%
pull(results, name = label) %>%
map(., plot, at = "PublishableAsIs") %>%
map2(., names(.), ~ .x + labs(subtitle = as_label(.y), title = NULL) +
see::theme_lucid() + theme(axis.text.x = element_text(angle = 50, hjust = 1)))
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
## data is available in your workspace.
## Trying to retrieve data from the model frame now.
\(All.no_weights_RE_rev
<img
src="investigate_weights_files/figure-html/unnamed-chunk-23-1.png"
width="672" />\)All.inv_folded_v_RE_rev
\(All.inv_bc_var_RE_rev
<img
src="investigate_weights_files/figure-html/unnamed-chunk-23-3.png"
width="672"
/>\)collinearity_removed.no_weights_RE_rev
\(collinearity_removed.inv_bc_var_RE_rev
<img
src="investigate_weights_files/figure-html/unnamed-chunk-23-5.png"
width="672"
/>\)collinearity_removed.inv_folded_v_RE_rev